Attribute Information:

Date Time: Each ten minutes. Temperature: Weather Temperature of Tetouan city. Humidity: Weather Humidity of Tetouan city. Wind Speed of Tetouan city. general diffuse flows diffuse flows power consumption of zone 1 of Tetouan city. power consumption of zone 2 of Tetouan city. power consumption of zone 3 of Tetouan city.

rm(list = ls()) #initialization
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
library(lubridate)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:data.table':
## 
##     first, last
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(zoo)
library(Metrics)
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
library(ggplot2)
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(RColorBrewer)
# read the real data on July 1 for test
df <- read.csv("Tetuan City power consumption.csv")
colnames(df)
## [1] "DateTime"                  "Temperature"              
## [3] "Humidity"                  "Wind.Speed"               
## [5] "general.diffuse.flows"     "diffuse.flows"            
## [7] "Zone.1.Power.Consumption"  "Zone.2..Power.Consumption"
## [9] "Zone.3..Power.Consumption"
# check for missing values
sum(is.na(df$Zone1))
## [1] 0
colnames(df)[7] <- "Zone1"
colnames(df)[8] <- "Zone2"
colnames(df)[9] <- "Zone3"
date <- seq(from=as.POSIXct("2017-01-01 00:00", format = "%Y-%m-%d %H:%M"), length.out = nrow(df), by = "10 min")
df$DateTime <- date #df(df$DateTime, format="%m/%d/%Y %H:%M")
#attr(df$DateTime, "tzone") <- "Africa/Casablanca"
df_xts <- xts(df[,7], date) 

EDA

check stationarity

# ts plot and acf plot
var = colnames(df)
var = var[-1]
for(i in var) {       
  ts.plot(df[[i]], ylab=i)
  acf(df[[i]], ylab = paste0(i," ACF"), main="")
}

library(tseries)
for(i in var) {       
  print(adf.test(df[[i]])) # detects stationarity
  print(kpss.test(df[[i]],null = "Trend")) # detects trend stationarity
}
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -13.207, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 42.545, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -20.358, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 2.1044, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -7.717, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 16.004, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -35.446, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 4.9499, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -32.848, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 1.3615, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -32.63, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 6.6416, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -31.192, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 2.089, Truncation lag parameter = 19, p-value = 0.01
## Warning in adf.test(df[[i]]): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df[[i]]
## Dickey-Fuller = -19.704, Lag order = 37, p-value = 0.01
## alternative hypothesis: stationary
## Warning in kpss.test(df[[i]], null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  df[[i]]
## KPSS Trend = 23.651, Truncation lag parameter = 19, p-value = 0.01
#perform frequency decomposition on our data to identify the strongest frequency signal
p <- periodogram(df$Zone1, xlim = c(0,0.06))

max_freq <- p$freq[which.max(p$spec)]
seasonality <- 1/max_freq
seasonality
## [1] 143.8027
#now let's find the remaining top frequencies
max_freqs <- p$freq[p$spec > 50000000000]
max_freqs
## [1] 1.905197e-05 3.810395e-05 6.915866e-03 6.934918e-03 6.953970e-03
## [6] 6.973022e-03 6.992074e-03 1.388889e-02 2.084286e-02

As noted above, there are certain frequencies that appear. The first two frequencies are so close to zero that they bear no meaning (frequency of 0 means an event does not repeat. We may get additional insights if we could obtain several years’ worth of data)

seasonalities <- 1/max_freqs
seasonalities
## [1] 52488.00000 26244.00000   144.59504   144.19780   143.80274   143.40984
## [7]   143.01907    72.00000    47.97806

As mentioned previously, the first two seasonalities are associated with frequencies that are close to zero and bear no meaning. Seasonalities close to 144 are for daily seasonality (six 10-minute periods per hour times 24 = 144). Seasonalities close to 72 are for semi-daily seasonality (12 hours, six 10-minute periods per hour times 12 = 72). Seasonalities close to 48 are for 8-hour frequencies (six 10-minute periods per hour times 8 = 48.)

plot(df$Zone1, type = "l", xlab = "Time", ylab = "Watt Hours", main = "Power Consumption of Tetouan City", col = "#4F94CD")
lines(df$Zone2, col = "#708090")
lines(df$Zone3, col = "#EEE5DE")

legend("topleft", c("Zone 1", "Zone 2","Zone 3"),
       lty = c(1,1),
       col = c("#4F94CD","#708090", "#EEE5DE"))

Correlation

cor_df <- df[, c(2,3,4,5,6,7)]
cor_mat <- round(cor(cor_df),2)
heatmap.2(cor_mat,Colv=NA,Rowv=NA,col=brewer.pal(9,"Blues"),cellnote=cor_mat,notecol="white", na.color=par("bg"),trace='none',density.info='none', margins = c(5, 5),cexRow=0.5,cexCol=0.5)
## Warning in heatmap.2(cor_mat, Colv = NA, Rowv = NA, col = brewer.pal(9, :
## Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting row dendogram.
## Warning in heatmap.2(cor_mat, Colv = NA, Rowv = NA, col = brewer.pal(9, :
## Discrepancy: Colv is FALSE, while dendrogram is `column'. Omitting column
## dendogram.

comp_daily <- decompose(ts(df$Zone1,frequency = 6*24))
plot(comp_daily)

comp_weekly <- decompose(ts(df$Zone1,frequency = 6*24*7))
plot(comp_weekly)

plot(df_xts['2017-01-01/2017-01-07'], type = "l", xlab = "Time", ylab = "Watt Hours", main = "One Week of Power Consumption of Zone 1 of Tetouan City ", col = "#4F94CD")

plot(df_xts['2017-01-01'], type = "l", xlab = "Time", ylab = "Watt Hours", main = "One Day of Power Consumption of Zone 1 of Tetouan City ", col = "#4F94CD")

plot(df_xts['2017-01-01/2017-01-30'], type = "l", xlab = "Time", ylab = "Watt Hours", main = "One Month of Power Consumption of Zone 1 of Tetouan City ", col = "#4F94CD")